home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_400
/
425_01
/
tar
/
deflate.c
< prev
next >
Wrap
Text File
|
1980-07-23
|
30KB
|
802 lines
/*
The following sorce code is derived from Info-Zip 'zip' 2.01
distribution copyrighted by Mark Adler, Richard B. Wales,
Jean-loup Gailly, Kai Uwe Rommel, Igor Mandrichenko and John Bush.
*/
/*
* deflate.c is initially written by Jean-loup Gailly.
*
* PURPOSE
*
* Identify new text as repetitions of old text within a fixed-
* length sliding window trailing behind the new text.
*
* DISCUSSION
*
* The "deflation" process depends on being able to identify portions
* of the input text which are identical to earlier input (within a
* sliding window trailing behind the input currently being processed).
*
* The most straightforward technique turns out to be the fastest for
* most input files: try all possible matches and select the longest.
* The key feature of this algorithm is that insertions into the string
* dictionary are very simple and thus fast, and deletions are avoided
* completely. Insertions are performed at each input character, whereas
* string matches are performed only when the previous match ends. So it
* is preferable to spend more time in matches to allow very fast string
* insertions and avoid deletions. The matching algorithm for small
* strings is inspired from that of Rabin & Karp. A brute force approach
* is used to find longer strings when a small match has been found.
* A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
* (by Leonid Broukhis).
* A previous version of this file used a more sophisticated algorithm
* (by Fiala and Greene) which is guaranteed to run in linear amortized
* time, but has a larger average cost, uses more memory and is patented.
* However the F&G algorithm may be faster for some highly redundant
* files if the parameter max_chain_length (described below) is too large.
*
* ACKNOWLEDGEMENTS
*
* The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
* I found it in 'freeze' written by Leonid Broukhis.
* Thanks to many info-zippers for bug reports and testing.
*
* REFERENCES
*
* APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
*
* A description of the Rabin and Karp algorithm is given in the book
* "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
*
* Fiala,E.R., and Greene,D.H.
* Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
*
* INTERFACE
*
* int lm_init(void)
* Initialize the "longest match" routines for a new file
*
* int fast_deflate(char*, unsigned)
* int lazy_deflate(char*, unsigned)
* Processes a new input file and return its compressed length. Sets
* the compressed length, crc, deflate flags and internal file
* attributes.
*/
#include <stdio.h>
#include "modern.h"
#include "zalloc.h"
#include "zippipe.h"
#include "zipdefs.h"
#include "zipguts.h"
#ifdef MODERN
# include <string.h>
#else
# ifdef pyr /* Pyramid */
# define ZMEM /* No mem???() functions at all */
# endif
#endif
/* Define this symbol if your target allows access to unaligned data.
* This is not mandatory, just a speed optimization. The compressed
* output is strictly identical.
*/
#ifndef UNALIGNED_OK
# ifdef MSDOS
# ifndef WIN32
# define UNALIGNED_OK
# endif
# endif
# ifdef i386
# define UNALIGNED_OK
# endif
# ifdef mc68020
# define UNALIGNED_OK
# endif
# ifdef vax
# define UNALIGNED_OK
# endif
#endif
/* Compile with MEDIUM_MEM to reduce the memory requirements or
* with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
* entire input file can be held in memory (not possible on 16 bit systems).
* Warning: defining these symbols affects HASH_BITS (see below) and thus
* affects the compression ratio. The compressed output
* is still correct, and might even be smaller in some cases.
*/
#ifdef SMALL_MEM
# define HASH_BITS 13 /* Number of bits used to hash strings */
#endif
#ifdef MEDIUM_MEM
# define HASH_BITS 14
#endif
#ifndef HASH_BITS
# define HASH_BITS 15
/* For portability to 16 bit machines, do not use values above 15. */
#endif
#define HASH_SIZE (unsigned)(1<<HASH_BITS)
#define HASH_MASK (HASH_SIZE-1)
#define WMASK (WSIZE-1)
/* HASH_SIZE and WSIZE must be powers of two */
#define NIL 0
/* Tail of hash chains */
#ifndef TOO_FAR
# define TOO_FAR 4096
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
#ifdef ATARI_ST
# undef MSDOS /* avoid the processor specific parts */
/* (but the Atari should never define MSDOS anyway ...) */
#endif
#ifdef MSDOS
# ifndef NO_ASM
# ifndef ASMV
# define ASMV
# endif
# endif
#endif
#ifdef ASMV
# ifndef MSDOS
# ifdef DYN_ALLOC
#error: DYN_ALLOC not yet supported in match.s
# endif
# endif
#endif
#ifdef MSDOS
# ifndef __32BIT__
# define MAXSEG_64K
# endif
#endif
/* Local data used by the "longest match" routines. */
#if defined(BIG_MEM) || defined(MMAP)
typedef unsigned Pos; /* must be at least 32 bits */
#else
typedef ush Pos;
#endif
typedef unsigned IPos;
/* A Pos is an index in the character window. We use short instead of int to
* save space in the various tables. IPos is used only for parameter passing.
*/
#ifndef DYN_ALLOC
uch window[2L*WSIZE];
/* Sliding window. Input bytes are read into the second half of the window,
* and move to the first half later to keep a dictionary of at least WSIZE
* bytes. With this organization, matches are limited to a distance of
* WSIZE-MAX_MATCH bytes, but this ensures that IO is always
* performed with a length multiple of the block size. Also, it limits
* the window size to 64K, which is quite useful on MSDOS.
* To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
* be less efficient since the data would have to be copied WSIZE/BSZ times)
*/
Pos prev[WSIZE];
/* Link to older string with same hash index. To limit the size of this
* array to 64K, this link is maintained only for the last 32K strings.
* An index in this array is thus a window index modulo 32K.
*/
Pos head[HASH_SIZE];
/* Heads of the hash chains or NIL. If your compiler thinks that
* HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC.
*/
#else
uch far *window = NULL;
Pos far *prev = NULL;
Pos far *head = NULL;
#endif
#ifndef zalloc
void far *p_win = NULL;
void far *p_prev = NULL;
void far *p_head = NULL;
#else
# define p_win window
# define p_prev prev
# define p_head head
#endif
#define WINDOW_SIZE ((ulg)2*WSIZE)
long block_start;
/* window position at the beginning of the current output block. Gets
* negative when the window is moved backwards. */
static unsigned ins_h; /* hash index of string to be inserted */
static char uptodate; /* hash preparation flag */
#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
/* Number of bits by which ins_h and del_h must be shifted at each
* input step. It must be such that after MIN_MATCH steps, the oldest
* byte no longer takes part in the hash key, that is:
* H_SHIFT * MIN_MATCH >= HASH_BITS */
unsigned int prev_length;
/* Length of the best match at previous step. Matches not greater than this
* are discarded. This is used in the lazy match evaluation. */
unsigned strstart; /* start of string to insert */
unsigned match_start; /* start of matching string */
static unsigned lookahead; /* number of valid bytes ahead in window */
unsigned minlookahead;
unsigned max_chain_length;
/* To speed up deflation, hash chains are never searched beyond this length.
* A higher limit improves compression ratio but degrades the speed. */
static unsigned int max_lazy_match;
/* Attempt to find a better match only when the current match is strictly
* smaller than this value. This mechanism is u